{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "Create by 2020-09-24\n",
    "\n",
    "@author: zly\n",
    "\"\"\"\n",
    "import numpy as np\n",
    "from numpy import *\n",
    "from time import sleep\n",
    "import matplotlib\n",
    "from matplotlib import pyplot as plt\n",
    "\n",
    "# 加载数据集\n",
    "def loadDataSet(fileName):\n",
    "    # 初始化一个空列表\n",
    "    dataSet = []\n",
    "    # 读取文件\n",
    "    fr = open(fileName)\n",
    "    # 循环遍历文件所有行\n",
    "    for line in fr.readlines():\n",
    "        # 切割每一行的数据\n",
    "        curLine = line.strip().split('\\t')\n",
    "        # 将数据转换为浮点类型,便于后面的计算\n",
    "        # fltLine = [float(x) for x in curLine]\n",
    "        # 将数据追加到dataMat\n",
    "        fltLine = list(map(float,curLine))    # 映射所有的元素为 float（浮点数）类型\n",
    "        dataSet.append(fltLine)\n",
    "    # 返回dataMat\n",
    "    return dataSet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 使用欧式距离计算两个向量的距离\n",
    "def distEclud(vecA, vecB):\n",
    "    # 数组元素求平方\n",
    "    return sqrt(sum(power(vecA - vecB, 2)))\n",
    "\n",
    "# 构建一个包含k个随机质心的集合\n",
    "def randCent(dataMat, k):\n",
    "    # 获取样本数与特征值(80,2)\n",
    "    m, n = shape(dataMat)\n",
    "    # 初始化质心,创建(k,n)个以零填充的矩阵\n",
    "    centroids = mat(zeros((k, n)))\n",
    "    # 循环遍历特征值\n",
    "    for j in range(n):\n",
    "        '''\n",
    "        随机质心必须要整个数据集的边界之内\n",
    "        找到每个维的最大值和最小值，求出范围\n",
    "        然后生成0到1.0之间的随机数并通过最小值和取值范围，以便确保随机点在数据边界之内\n",
    "        '''\n",
    "        #计算每一列的最小值\n",
    "        minJ = min(dataMat[:, j])\n",
    "        #print(\"minJ\",minJ)\n",
    "        # 计算每一列的范围值\n",
    "        rangeJ = float(max(dataMat[:, j]) - minJ)\n",
    "        #print(\"rangeJ\",rangeJ)\n",
    "        # 计算每一列的质心,并将值赋给centroids\n",
    "        #print(\"mat(minJ + rangeJ * random.rand(k, 1))\",mat(minJ + rangeJ * random.rand(k, 1)))\n",
    "        #将最小值加上范围（max-min）*(3,1)矩阵\n",
    "        centroids[:, j] = mat(minJ + rangeJ * random.rand(k, 1))\n",
    "#         print('centroids[:, j]\\n',centroids[:, j])\n",
    "    #返回质心\n",
    "    return centroids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(80, 2)\n",
      "[[-5.379713]] [[4.838138]] [[-4.232586]] [[5.1904]]\n"
     ]
    }
   ],
   "source": [
    "dataMat=mat(loadDataSet('testSet.txt'))\n",
    "print(dataMat.shape)\n",
    "print(min(dataMat[:,0]),max(dataMat[:,0]),min(dataMat[:,1]),max(dataMat[:,1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXIAAAD4CAYAAADxeG0DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAXmklEQVR4nO3df2jd53XH8c/JrT2Re1tmiKGlSeeG7o+FJrYaYaX2Hyl2G0WplqyQQrW6hPWPMFihJitdUydzI7E/QiDpHy0Ms40OGlRGl6mTqLBTVhhrFrWyrWTLso64JG3SlbiM0VlBVnV79od8HenqXut77/3+eJ7v9/2CC9a1dO9zf517vuc53+cxdxcAIF7XFT0AAMBgCOQAEDkCOQBEjkAOAJEjkANA5N5RxJ3ecMMNvm/fviLuGgCidfbs2V+6+9726wsJ5Pv27dPS0lIRdw0A0TKz1zpdT2kFACJHIAeAyBHIASByBHIAiByBHAAiV0jXCoDwNJtNLSws6Pz58xoeHtb4+LhqtVrRw0ICBHIAajabGhsb0+LiolZWVlSv1zU6OqrTp0+XJpiX+YuKQA5ACwsLWlxc1KVLlyRJly5d0uLiohYWFjQxMVHw6AZX9i8qauQAdP78ea2srGy5bmVlRcvLy8UMKGWbv6jcfcsXVRkQyAFoeHhY9Xp9y3X1el0HDhwoZkApK/sXFYEcgMbHxzU6OqpGoyEzU6PR0OjoqMbHx4seWirK/kVFjRyAarWaTp8+rYWFBS0vL+vAgQOlmgxsfVG118jL8kVlRezZOTIy4iyaBSBPra6VmL+ozOysu49su55ADgBx6BbIqZEDQOQI5AAQOQI5AESOQA4AkSOQA0DkCOQAEDkCOQBEjkAOAJEjkANA5AjkABA5Fs0CKqTMu+RUGYEcmSFohKXsu+RUGYEcmSBohKfs27lVGTVyZKLsW2vFqOy75FQZgRyZIGiEp+y75FQZgRyZIGiEp+zbuVVZajVyM6tJWpL0hrtTcKu4sm+tFaOyb+dWZantEGRmD0kakfSunQI5OwRVQxm21kKYqtoR1W2HoFQycjO7UdLHJf2FpIfSuE3Er1araWJigo4IpIqOqO3SqpF/VdIXJf2m2y+Y2YNmtmRmSxcvXkzpbgH0q9lsan5+XtPT05qfn1ez2Rzo9/JCR9R2A2fkZjYh6U13P2tmH+n2e+5+StIpaaO0Muj9Auhf0qw2xOz3Wh1RVT36SyMjPyzpXjN7VdK3JB0xs2+mcLsAMpI0qw0x+6UjaruBA7m7P+zuN7r7PkmfkvRP7n5s4JEByEzSPv8QzwegjXI7TtEHKqiV1bZO15c6Z7VJfy9PtFFul1r7YS9oPwSKFXONvMq6tR8SyJGLqvb9hixpnz/nA4SDQI7CkNUB6egWyFlrBZkLsfMBKBMCeWBCO/kiDSF2PgBlQtdKQMpaggix8wGDYc4jLJUL5CG/Acu6g0teKyGG/NqWSVkTjphVKpCH/gbsVoI4d+7c1f+PMUDl0fcb+mtbJmVNOKLm7rlfbr/9di/C3NycNxoNl3T10mg0fG5urpDxtOs0vnq97vv37/dGo+Fm5o1Gw48ePerr6+tFDzcoob+2ZTI1NeVmtuW5NjOfnp4uemilJ2nJO8TUSk12hj7p1unU4w984AO6cOHClo6PH/zgB3rggQdKMxmahtBf2zLptNbJrl27tLa2xvuxIJUK5KEvttMqQczMzGhqakozMzP6xCc+sS1Ara6u6umnn9bk5KTGxsb48Cj817ZMWgnH5ud7bW1NTzzxhO666y7ej0XolKZnfSmqtLK+vu5Hjx6NqkzRqWSggsoH6+vrPjc351NTUz43NxfU8xbjaxuz9fV1P3HixLYSS61W89nZ2aKHV1rqUlqp1GRnjIvtbO742Ny+15LXOsyhTybG+NrGrFar6dVXX5W3nRnebDb17W9/W/fdd1+i26HTKCWdonvWl6Iy8li1MuFjx4750NBQIRk5k4lo9+lPf7rjUeKxY8cS/T1HUb0Tk53xau19+Y1vfEOHDx8uZB3mficTy3imKjZ88pOf3JY912o13X///Yn+PoalG2J5/1aqtBK7QcoHgx7C3nbbbbruuuu2vJGvu+463Xrrrde8z5DLMaELvewwMTGhO++8U88995xWV1c1NDSkQ4cOJS7zhb5lW1Tv305petYXSivpu9ZEZKdD2CNHjvjs7GziicvZ2Vmv1Wo9TWxRjulfLGWHy5cv+6OPPupHjhzxRx991C9fvpz4b0N/f4Q4PnUprRDIS2CnD32nN2StVvOhoaHEQWJqaqpjPfRaJ4Fw4kj/Qgwi7Qb9sgn9yyrE92+3QE6NvAR2qjV2OoRtNptaXV1NXJscHh5Wo9HYcl2j0bhmnza93f2L4QSnQWvcnc6bGLRskWZNO6b3L4G8BHb60Hd6Q7bbKUj0s+Etm+T2L4QgslNQTOPLpjWR/8gjj2hiYmLgID42NqbJyUmdPHly4BPmonr/dkrTs75QWknXTofh7YewQ0ND2+rdSQ7bW3X46enpxCcE9fM3KL7skOT+Qyv/ZDGe0N6/okZeXkk+dJvfkLOzs37kyJEtgf3mm2/22dnZwt+oeFuRQSRJUCz6y6ZdiDXttBHIS67XD/36+rrPzs76zTfffPUko6I/iAhH0qAYUsYa2hFCFroFcvrIS6JVa0zaf1ur1VSr1fTmm29qdXVVUnrrSofe/4ydJd3Vqdf3XZby2sAkRATyCsvihIyoTqJAV3kGxbS++Ku83g6BvMKy2EuT3WPKIa+gmPYXf0hHCHmi/bDCsmiviqH/Gcmk2RrYTQzrrcSAjLzCssi6ssjyUV6hr7cSCwJ5xaV9KFrlCSf0ji/+dNhGR0u+RkZGfGlpKff7RT5ak1fLy8tXV0d88cUX6WDBNkyO98bMzrr7yLbrCeTISt4fUtoe47T5i79KnSb9IJAjd/Pz85qcnNxy2NxoNDQzM5N6/ZPMDlXQLZDTtYLM5NnBQvcDqoxAjszkuYIfbY+osoEDuZndZGbfN7OXzewlM/t8GgND/PJcBjSEZV+BogxcIzez90h6j7ufM7N3Sjor6Q/c/T+6/Q018urIayKLGnk4mHTOTm6TnWb2HUlfc/dnu/0OgRxZoPuheHyhZiuXQG5m+yT9s6QPuvuv2v7vQUkPStL73ve+21977bXU7hdAGPLsVKqizLtWzKwh6e8lHW8P4pLk7qfcfcTdR/bu3ZvW3QIICJPOxUjlFH0z26WNIP60uz+Txm1iO2qPCB2n3Bdj4EBuZibpryW97O5PDj4kdELtETFgrZ1ipJGRH5b0GUn/ZmbLV677srt/N4XbxhWs840YVHlzhyINHMjd/V8kWQpjwTWw3CdiUdXNHYrEMraRoPaImHWa35HEnE9KCOQFSzqBSe0Rseo0v3Pw4EFJ0g9/+EPmfFJAIC9QLxOY1B4Rq07zO88995wkaXV19ep1zPn0j0WzCtTrin157KEIpK3T/M7q6urVIN5Cv3n/COQF4uQJVEGnBc2GhoY0NDS05TrmfPpHIC8QK/ahCjqtgnno0CEdOnQol5Uxq4AaeYGYwEQVdJvfkcScT0rY6q1grNgHICn27ASAyHUL5JRWAKCDmBapI5ADQJvYFqmjawUA2vR6jkfRCOQA0Ca2czwI5ADQJrZzPAjkANCm00lMIZ/jwWQnALSJbZE6+sgBIBL0kSOqvlgAyRHIKyK2vlgAyTHZWRGx9cUCSI5AXhGx9cUCSI5AXhGx9cUCSI5AXhGx9cUCSI7JzoqIrS8WQHL0kQNAJLr1kVNaAYDIEcgBIHIEcgCIHIEcACJHIAeAyBHIASByBHIAiByBHAAil0ogN7O7zezHZvaKmX0pjdsEACQzcCA3s5qkr0sal3SLpEkzu2XQ2wUAJJNGRn5Q0ivu/hN3X5P0LUn3pXC7AIAE0gjk75X0s00/v37lui3M7EEzWzKzpYsXL6ZwtwAAKZ1Abh2u27YSl7ufcvcRdx/Zu3dvCncLAJDSCeSvS7pp0883Svp5CrcLAEggjUD+I0m/a2bvN7Pdkj4l6R9TuF0AQAIDbyzh7utm9jlJpyXVJP2Nu7808MgAAImkskOQu39X0nfTuC0AQG84sxMAIseenUBBms2mFhYWdP78eQ0PD7OHKvpGIAcK0Gw2NTY2psXFRa2srKher2t0dFSnT58mmKNnlFaAAiwsLGhxcVGXLl2Su+vSpUtaXFzUwsJC0UNDhAjkQAHOnz+vlZWVLdetrKxoeXm5mAEhagRyoADDw8Oq1+tbrqvX6zpw4EAxA0LUCORAAcbHxzU6OqpGoyEzU6PR0OjoqMbHx4seGiLEZCdQgFqtptOnT2thYUHLy8s6cOAAXSvom7lvW98qcyMjI760tJT7/QLAoIpsGzWzs+4+0n49GTkAJBRq2yg1cgBIKNS2UQI5ACQUatsogRwAEgq1bZRADgAJhdo2ymQnACQUatso7YcAkIM02hZpPwSAgmTdtkiNHAAylnXbIoEcADKWddsigRwAMpZ12yKBHAAylnXbIpOdAJCxrNsWaT8EgEh0az+ktAIAkSOQA0DkCOQAEDkCOQBEjq6VgBS5hRSAeBHIAxHqFlIAugsl+SKQB2LzWgyStqzFMDExUfDoALQLKfmiRh6IULeQAtBZSPt3EsgDEeoWUsBmzWZT8/Pzmp6e1vz8vJrNZtFDKkxIyRellUC01mJoP0wregspoCWkUkIIWslXqxwqFZd8DRTIzewJSb8vaU3SBUl/5O7/m8K4KifvLaRCmaRBPJjH2Sqk5GvQjPxZSQ+7+7qZPS7pYUl/NviwqqlWq2liYiLzDwWZFfpxrVJCt/dsmROGkPbvHCiQu/uZTT8+L+n+wYaDPJBZoR+9lhKqkDDklXztJM3Jzs9K6jpda2YPmtmSmS1dvHgxxbtFN90mpkKapEE8el1TO6SujrLbMSM3s+9JeneH/zrh7t+58jsnJK1Lerrb7bj7KUmnpI1lbPsabUbKePh3rWwopEkaxKPXUkI/pRj0yd0Hukh6QNK/Sro+6d/cfvvtHor19XU/evSoNxoNNzNvNBp+9OhRX19fL3poA5mbm/NGo+GSrl4ajYbPzc2V9jEjLNd6D6I/kpa8Q0wdtGvlbm1Mbt7p7m8NcltFKaJe3O8RQC9/t1M2FMokDcorpK6Oshu0a+Vrkn5L0rNmJknPu/sfDzyqHOV9+NfvBFCvf7dT+SSUSRqUV0hdHaXXKU3P+hJSaSXvw79+76/Xv6N8ApSPupRWKn+Kfta7W7e0OkieeuqpLVmylKxjpNdOk1Y2NDMzo6mpKc3MzJSq7QvA2yp/in4eh3/tZZF2STpG+uk0oXwCVINtZOv5GhkZ8aWlpdzvtyjz8/OanJzclolLunoEkHaNHED5mNlZdx9pv77yGXkeOpVFzExHjhzR8ePHEx0BMHGEkJTx3Iu0FPHcEMhz0K0scvz48Z7KHpRKEAKODrsr6rmp/GRnHvKaUAXykNap92Vc27yoZQnIyHNAWQRlksa5F2XN6otaloBAnpNeyiLUHxGyNNbqKesKnEWtY0RpJTCtTGVyclInT57U5OSkxsbGSnHYiXJIo1RY1hU4iyqjkpEHpqyZCsojjVJhWVfgLKqMWvpAHluZgqU/EYNBO6jKvKBWEd1lpQ7kMU6olDVTATajASBdpT6zs9MZlY1GQzMzM5mtbDho9h/jlw/KL7Yj27Kq5JmdeZYp0grAZCoIDclF+EodyPMsU6Q5SZlWja09i7rrrrt05swZsir0hAn48JU6kOc5oRLaJGV7FnX99ddr9+7d+vWvf01WhZ6E9t7GdqUO5HmWKTpl/9dff31hk5TtWdTKysqWDyNZFTa7Vg081Al46vabdNptIutLSDsEpeXy5cu+Z8+eLTv47Nmzx9966y2fm5vzqampqxsf52FqasrNbMt42i9m5tPT07mMB+HaaTepEHebCnFMeVAWmy/jbWfOnNHa2tqW6y5fvqwPf/jDunDhQu7ljE5ZVLsQsqqyiyFr3KkGPuiRbRbPAXX7Np2ie9aXMmbk3TLg3bt357Yf6GbtGUu9Xvc9e/Z4vV6vVAZTpFiyxk7v3bSO1rJ6DrIcc8gUe0YeembTKQNuTS5ultckUacsqtW1QltjPmLJGrOsgSd9Dnr9fIdaty9Mp+ie9aXXjDyGzKbTGPfv3++NRvKd71EusWSNWX6+kjwH/dx/DDEhC4o5I48hs+mWAd9zzz1BricR+hFOGcSSNWbZ3ZXkOejn882Jc206RfesL71m5LFkNp2sr6/73NycT09P59q1stOYqpjN5I3nOdlzEPPnO2+KOSOPJbPpJO2V0NLIpGM4wikDssZkz0HMn+9gdIruWV/KWCPPQ1rPAxkQQtKpw2r//v3+la98JZij2FAo5oyczGZDWpk0GRBCsvnzfe7cOT3zzDO6cOGCHnvsMZaSSCiard5aJYpHHnnk6kkKVZPW9lhFbUcFdNP6fH/oQx/ShQsXct+FPnZRZOTYkFYmzREOQsUCXf0hkEckzdUci9iOCthJp2Rl165dWltbU7PZJNnootQ7BJVRq2uFTBpl1Fp++fnnn9+Smdfrdd1xxx2Vr5V32yGIQA4gKM1mU4899pgef/zxLQvRZblNYywqudVbmjgTsnx4TcNUq9W0a9euntYpqvprmUogN7MvSHpC0l53/2UatxkS9iwsH17TsPVSK+e1TKH90MxukvQxST8dfDhh2ty/TUtUOYT+mjabTc3Pz2t6elrz8/NqNptFDylXrYn9er1+9bq1tTU9+eSTGhsb2/J8hP5a5iGNPvKnJH1RG2cIllJa/dsIR8ivaSvDnJyc1MmTJzU5ObkteJVdq0X2oYce0u7du69ev7Kysi1Ih/xa5mWgQG5m90p6w91fSPC7D5rZkpktXbx4cZC7zV3rMG8zzoSMW8ivKRnmhp1q5S0hv5Z52TGQm9n3zOzfO1zuk3RC0p8nuSN3P+XuI+4+snfv3kHHnSvOhCyfkF9TMsy3JQnSIb+WedlxstPdP9rpejO7VdL7Jb1gZpJ0o6RzZnbQ3X+R6igLxpmQG8rUGRDya8paOG9LchJcyK9lXlLrIzezVyWNJOlaoY88PnQG5IfneitOgntb5icEEcjLbX5+XpOTk1uyRE7QyA7BC51kfkKQu+9L67YQHhYzyhdr4aAX0Sxji2LRGQCEi0COROgMAMLFWitIhM4AIFysfggAkeg22UlpBQAiRyAHgMgRyAEgcgRyAIgcgRwAIldI14qZXZT0Wu53nJ8bJJVup6RrqNLjrdJjlXi8ofkdd9+2fGwhgbzszGypU4tQWVXp8VbpsUo83lhQWgGAyBHIASByBPJsnCp6ADmr0uOt0mOVeLxRoEYOAJEjIweAyBHIASByBPKMmdkXzMzN7Iaix5IlM3vCzP7TzF40s38ws98uekxpM7O7zezHZvaKmX2p6PFkycxuMrPvm9nLZvaSmX2+6DFlzcxqZnbezOaLHkuvCOQZMrObJH1M0k+LHksOnpX0QXe/TdJ/SXq44PGkysxqkr4uaVzSLZImzeyWYkeVqXVJf+ruvyfpDkl/UvLHK0mfl/Ry0YPoB4E8W09J+qKk0s8ou/sZd1+/8uPzkm4scjwZOCjpFXf/ibuvSfqWpPsKHlNm3P2/3f3clX//nzYC3HuLHVV2zOxGSR+X9FdFj6UfBPKMmNm9kt5w9xeKHksBPitpoehBpOy9kn626efXVeLAtpmZ7ZM0LGmx4KFk6avaSLp+U/A4+sJWbwMws+9JeneH/zoh6cuS7sp3RNm61uN19+9c+Z0T2jgsfzrPseXAOlxX+iMtM2tI+ntJx939V0WPJwtmNiHpTXc/a2YfKXg4fSGQD8DdP9rpejO7VdL7Jb1gZtJGmeGcmR1091/kOMRUdXu8LWb2gKQJSUe9fCcovC7ppk0/3yjp5wWNJRdmtksbQfxpd3+m6PFk6LCke83sHklDkt5lZt9092MFjysxTgjKgZm9KmnE3UNeVW0gZna3pCcl3enuF4seT9rM7B3amMQ9KukNST+S9Ifu/lKhA8uIbWQgfyvpf9z9eMHDyc2VjPwL7j5R8FB6Qo0cafmapHdKetbMls3sL4seUJquTOR+TtJpbUz8/V1Zg/gVhyV9RtKRK6/n8pWMFQEiIweAyJGRA0DkCOQAEDkCOQBEjkAOAJEjkANA5AjkABA5AjkARO7/Ab4tr5poT74rAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# zly_test\n",
    "dataMat_tmp=np.array(dataMat)\n",
    "plt.scatter(np.transpose(dataMat_tmp)[0], np.transpose(dataMat_tmp)[1],marker='.',color='k',s=100)     \n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "matrix([[-1.3250225 , -1.22214647],\n",
       "        [ 4.27220441,  1.25141569]])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "randCent(dataMat, 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "必选:\n",
    "dataMat:数据集\n",
    "k:簇数\n",
    "非必选:\n",
    "distMeas=distEclud ：距离(r)\n",
    "createCent=randCent：创建初始质心的函数\n",
    "返回:\n",
    "centroids:质心\n",
    "clusterAssment:\n",
    "clusterAssment包含两个列:\n",
    "第一列记录簇索引值,\n",
    "第二列存储误差(误差是指当前点到簇质心的距离,后面会使用该误差来评价聚类的效果)\n",
    "\"\"\"\n",
    "def kMeans(dataMat, k, distMeas=distEclud, createCent=randCent):\n",
    "    # 获取样本数和特征数（80,2）\n",
    "    m, n = shape(dataMat)\n",
    "    # 初始化一个矩阵来存储每个点的簇分配结果\n",
    "    # clusterAssment包含两个列:\n",
    "    #一列记录簇索引值,\n",
    "    #第二列存储误差(误差是指当前点到簇质心的距离,后面会使用该误差来评价聚类的效果)\n",
    "    clusterAssment = mat(zeros((m, 2)))\n",
    "    \n",
    "    # 创建质心,随机K个质心\n",
    "    centroids = createCent(dataMat, k)\n",
    "    # 初始化标志变量,用于判断迭代是否继续,如果True,则继续迭代\n",
    "    clusterChanged = True\n",
    "    zly=0\n",
    "    while clusterChanged:\n",
    "        clusterChanged = False\n",
    "        # 遍历所有数据找到距离每个点最近的质心,\n",
    "        # 可以通过对每个点遍历所有质心并计算点到每个质心的距离来完成\n",
    "        for i in range(m):\n",
    "            minDist = inf#最小距离设置为无穷大\n",
    "            minIndex = -1#最小质心的index(索引)设置为-1\n",
    "            for j in range(k):\n",
    "                # 计算数据点到质心的距离\n",
    "                # 计算距离是使用distMeas参数给出的距离公式,默认距离函数是distEclud\n",
    "                distJI = distMeas(centroids[j, :], dataMat[i, :])\n",
    "                # 如果距离比minDist(最小距离)还小,更新minDist(最小距离)和最小质心的index(索引)\n",
    "                if distJI < minDist:\n",
    "                    minDist = distJI\n",
    "                    minIndex = j\n",
    "                    zly=zly+1\n",
    "            # 如果任一点的簇分配结果发生改变,则更新clusterChanged标志\n",
    "            if clusterAssment[i, 0] != minIndex: clusterChanged = True\n",
    "            # 更新簇分配结果为最小质心的index(索引),minDist(最小距离)的平方\n",
    "            #更新属于哪个簇，离那个簇距离的平方是多少\n",
    "            clusterAssment[i, :] = minIndex, minDist ** 2\n",
    "        # print(centroids)\n",
    "        # 遍历所有质心并更新它们的取值\n",
    "#         print(clusterAssment)\n",
    "        for cent in range(k):\n",
    "            # 通过数据过滤来获得给定簇的所有点\n",
    "            \"\"\"\n",
    "            nonzero(clusterAssment[:, 0].A == cent)\n",
    "            nonzero函数是numpy中用于得到数组array中非零元素的位置（数组索引）的函数\n",
    "            [0]表示行索引\n",
    "            [1]表示列索引\n",
    "            \"\"\"\n",
    "            ptsInClust = dataMat[nonzero(clusterAssment[:, 0].A == cent)[0]]\n",
    "#             print(ptsInClust)\n",
    "            # 计算所有点的均值,axis=0表示沿矩阵的列方向进行均值计算\n",
    "            centroids[cent, :] = mean(ptsInClust, axis=0)\n",
    "        \n",
    "    print('zly:',zly)\n",
    "    #返回所有的类质心与点分配结果\n",
    "    return centroids, clusterAssment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "zly: 1560\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\anaconda\\lib\\site-packages\\numpy\\matrixlib\\defmatrix.py:445: RuntimeWarning: Mean of empty slice.\n",
      "  return N.ndarray.mean(self, axis, dtype, out, keepdims=True)._collapse(axis)\n",
      "D:\\anaconda\\lib\\site-packages\\numpy\\core\\_methods.py:180: RuntimeWarning: invalid value encountered in true_divide\n",
      "  ret = um.true_divide(\n"
     ]
    }
   ],
   "source": [
    "myCentroids,clustAssing=kMeans(dataMat, 10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "matrix([[ 0.33258533, -3.763162  ],\n",
       "        [        nan,         nan],\n",
       "        [-3.17006745,  2.60393509],\n",
       "        [ 0.972564  ,  2.924086  ],\n",
       "        [-1.595569  ,  3.01158056],\n",
       "        [ 2.5788746 ,  1.1559532 ],\n",
       "        [-3.5980785 , -3.32781167],\n",
       "        [-3.43972814, -2.14989743],\n",
       "        [ 3.17437012, -2.75441347],\n",
       "        [ 2.624524  ,  3.53268467]])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "myCentroids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "test\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-9-4a56a5a4560f>:3: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
      "  label_test=np.array(clustAssing[:,0],dtype = np.int)\n"
     ]
    }
   ],
   "source": [
    "#自己做的测试画图\n",
    "print('test')\n",
    "label_test=np.array(clustAssing[:,0],dtype = np.int)\n",
    "dataMat\n",
    "clustAssing.shape\n",
    "def showDataSet(datamat,label_test,myCentroids):\n",
    "    \"\"\"\n",
    "    数据可视化\n",
    "    Parameters:\n",
    "        dataMat - 数据矩阵\n",
    "        labelMat - 数据标签\n",
    "        myCentroids - 质点\n",
    "    Returns:\n",
    "        无\n",
    "    \"\"\"\n",
    "    dataMat=np.array(datamat)\n",
    "    n = np.unique(label_test)\n",
    "    myCentroids_np=np.array(myCentroids)\n",
    "    last_01=[]\n",
    "    for j in range(len(n)):\n",
    "        tem=[]\n",
    "        for i in range(len(dataMat)):\n",
    "            if label_test[i]==j:tem.append(dataMat[i])\n",
    "        last_01.append(tem)\n",
    "    last=np.array(last_01)\n",
    "    print((last).shape)\n",
    "    for i in range(len(last)):\n",
    "        plt.scatter(np.transpose(last[i])[0], np.transpose(last[i])[1])\n",
    "   \n",
    "    plt.scatter(np.transpose(myCentroids_np)[0], np.transpose(myCentroids_np)[1],marker='+',color='k',s=100)     \n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(9,)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-9-4a56a5a4560f>:25: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
      "  last=np.array(last_01)\n"
     ]
    },
    {
     "ename": "IndexError",
     "evalue": "index 0 is out of bounds for axis 0 with size 0",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mIndexError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-10-2f4b8d1b2c1b>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mshowDataSet\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdataMat\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlabel_test\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mmyCentroids\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m \u001b[0mdataMat\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-9-4a56a5a4560f>\u001b[0m in \u001b[0;36mshowDataSet\u001b[1;34m(datamat, label_test, myCentroids)\u001b[0m\n\u001b[0;32m     26\u001b[0m     \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlast\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     27\u001b[0m     \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlast\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 28\u001b[1;33m         \u001b[0mplt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mscatter\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtranspose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlast\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtranspose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlast\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     29\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     30\u001b[0m     \u001b[0mplt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mscatter\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtranspose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmyCentroids_np\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtranspose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmyCentroids_np\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mmarker\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'+'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mcolor\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'k'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0ms\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m100\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mIndexError\u001b[0m: index 0 is out of bounds for axis 0 with size 0"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD4CAYAAAAO9oqkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAASQklEQVR4nO3df5Dc9V3H8efLhA4hUlNLKD1IJtXBtBUhrSujVTvFplCunaRK/6DCMO10zDAjlepIBTOCDoODxhmro6Nm4o86JXScSukYsIRYaXUq1AuEEApRilJIqly1NLbNyK+3f9xGl2P3du++m9vb+HzM7Nx+v9/3fj/v27m9135/7H5TVUiS/n/7jlE3IEkaPcNAkmQYSJIMA0kShoEkCVg+6gbmctppp9W6detG3YYkjY29e/d+rapWz/dxSzoM1q1bx9TU1KjbkKSxkeSJhTzO3USSJMNAkmQYSJIwDCRJNAyDJDcm2Z9kX5LdSSa61Jyc5ItJHkzycJJfazKmJGn4mp5NtK2qfgUgyc8B1wNXzqr5b+AnquqbSU4C/j7JX1fVvQ3HljTL7Q8cYttdBzn8zFEmVq3gmovW8543nTnqtjQGGoVBVR3pmFwJvOwrUGvma1G/2Z48qX3zq1KlIbv9gUNcd9tDHH3uBQAOPXOU6257CMBAUF+NjxkkuSnJk8BlzGwZdKtZlmQf8DRwd1Xd13RcSS+17a6D/xsExxx97gW23XVwRB1pnPQNgyR7khzoctsMUFVbq2oNcAtwVbd1VNULVbUBOAs4P8k5c4y3JclUkqnp6ekF/VLS/0eHnzk6r/lSp767iapq44Dr2gncAdwwx7qeSXIP8E7gQI+a7cB2gFar5e4kaUATq1ZwqMs//olVK0bQjcZN07OJzu6Y3AQ82qVmdZJV7fsrgI3d6iQ1c81F61lx0rKXzFtx0jKuuWj9iDrSOGl6NtHNSdYDLwJP0D6TqH2K6Y6qmgReC3wsyTJmwucvqmpXw3ElzXLsILFnE2khspSvgdxqtcovqpOkwSXZW1Wt+T7OTyBLkgwDSZJhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgSaJhGCS5Mcn+JPuS7E4yMUftsiQPJNnVZExJ0vA13TLYVlXnVtUGYBdw/Ry1VwOPNBxPknQcNAqDqjrSMbkSqG51Sc4C3gXsaDKeJOn4WN50BUluAq4AvgFc0KPso8BHgFMHWN8WYAvA2rVrm7YnSRpA3y2DJHuSHOhy2wxQVVurag1wC3BVl8e/G3i6qvYO0lBVba+qVlW1Vq9ePc9fR5K0EH23DKpq44Dr2gncAdwwa/6PApuSTAInA69M8vGqunxenUqSjpumZxOd3TG5CXh0dk1VXVdVZ1XVOuBS4LMGgSQtLU3PJrq5vctoP3AhM2cMkWQiyZ2Nu5MkLYpGB5Cr6pIe8w8Dk13m3wPc02RMSdLw+QlkSZJhIEkyDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkmh42cskNwKbgReBp4H3ty95ObvuX4H/Al4Anq+qVpNxJUnD1XTLYFtVnVtVG4BdwPVz1F5QVRsMAklaehqFQVUd6ZhcCVSzdiRJo9BoNxFAkpuAK4BvABf0KCtgd5IC/qiqts+xvi3AFoC1a9c2bU+SNIBUzf1mPske4Iwui7ZW1ac76q4DTq6qG7qsY6KqDic5Hbgb+FBVfb5fc61Wq6ampvqVSZLakuxdyO74vlsGVbVxwHXtBO4AXhYGxw4qV9XTST4FnA/0DQNJ0uJodMwgydkdk5uAR7vUrExy6rH7wIXAgSbjSpKGq+kxg5uTrGfm1NIngCthZrcQsKOqJoHXAJ9Kcmy8nVX1mYbjSpKGqFEYVNUlPeYfBibb9x8HzmsyjiTp+PITyJIkw0CSZBhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSaBgGSW5Msj/JviS7k0z0qFuV5JNJHk3ySJIfaTKuJGm4mm4ZbKuqc6tqA7ALuL5H3e8An6mq1wPnAY80HFeSNETLmzy4qo50TK4EanZNklcCbwXe337Ms8CzTcaVJA1X42MGSW5K8iRwGd23DL4HmAb+NMkDSXYkWTnH+rYkmUoyNT093bQ9SdIA+oZBkj1JDnS5bQaoqq1VtQa4BbiqyyqWA28G/qCq3gR8C7i213hVtb2qWlXVWr169YJ+KUnS/PTdTVRVGwdc107gDuCGWfOfAp6qqvva059kjjCQJC2+pmcTnd0xuQl4dHZNVf0b8GSS9e1Zbwe+1GRcSdJwNTqADNzc/if/IvAEcCVA+xTTHVU12a77EHBLklcAjwMfaDiuJGmImp5NdEmP+YeByY7pfUCryViSpOPHTyBLkgwDSZJhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CSRMMwSHJjkv1J9iXZ3b728eya9e3lx25Hkny4ybiSpOFqumWwrarOraoNwC7g+tkFVXWwqja0a34Q+DbwqYbjSpKGqFEYVNWRjsmVQPV5yNuBL1fVE03GlSQN1/KmK0hyE3AF8A3ggj7llwK39lnfFmALwNq1a5u2J0kaQKrmfjOfZA9wRpdFW6vq0x111wEnV9UNPdbzCuAw8P1V9e+DNNdqtWpqamqQUkkSkGRvVbXm+7i+WwZVtXHAde0E7gC6hgFwMXD/oEEgSVo8Tc8mOrtjchPw6Bzl76PPLiJJ0mg0PZvo5iQHkuwHLgSuBkgykeTOY0VJTgHeAdzWcDxJ0nHQ6AByVV3SY/5hYLJj+tvAq5uMJUk6fvwEsiTJMJAkGQaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEk0DIMkNybZn2Rfkt1JJnrU/XySh9vXS741yclNxpUkDVfTLYNtVXVuVW0AdgHXzy5Icibwc0Crqs4BlgGXNhxXkjREjcKgqo50TK4EqkfpcmBFkuXAKcDhJuNKkoZredMVJLkJuAL4BnDB7OVVdSjJbwFfAY4Cu6tqd9NxJUnD03fLIMme9r7+2bfNAFW1tarWALcAV3V5/KuAzcDrgAlgZZLL5xhvS5KpJFPT09ML/b0kSfPQd8ugqjYOuK6dwB3ADbPmbwT+paqmAZLcBrwF+HiP8bYD2wFarVav3U6SpCFqejbR2R2Tm4BHu5R9BfjhJKckCfB24JEm40qShqvp2UQ3t3cZ7QcuBK4GSDKR5E6AqroP+CRwP/BQe8ztDceVJA1RqpbunphWq1VTU1OjbkOSxkaSvVXVmu/j/ASyJMkwkCQZBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQahkGSG5PsT7Ivye4kEz3qrk5yIMnDST7cZExJ0vA13TLYVlXnVtUGYBdw/eyCJOcAPwOcD5wHvDvJ2Q3HlSQNUaMwqKojHZMrgepS9gbg3qr6dlU9D3wO+Mkm40qShqvxMYMkNyV5EriMLlsGwAHgrUleneQUYBJYM8f6tiSZSjI1PT3dtD1J0gBS1e3NfEdBsgc4o8uirVX16Y6664CTq+qGLuv4IPCzwDeBLwFHq+rn+zXXarVqamqqX5kkqS3J3qpqzfdxy/sVVNXGAde1E7gDeFkYVNUfA38MkOTXgafm0aMk6ThrejZR54HgTcCjPepOb/9cC/wUcGuTcSVJw9V3y6CPm5OsB14EngCuBGifYrqjqibbdX+Z5NXAc8DPVtXXG44rSRqiRmFQVZf0mH+YmQPFx6Z/vMk4kqTjy08gS5IMA0mSYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSSJ5l9HseTc/sAhtt11kMPPHGVi1QquuWg973nTmaNuS5KWtBMqDG5/4BDX3fYQR597AYBDzxzlutseAjAQJGkOJ9Ruom13HfzfIDjm6HMvsO2ugyPqSJLGwwkVBoefOTqv+ZKkGSdUGEysWjGv+ZKkGSdUGFxz0XpWnLTsJfNWnLSMay5aP6KOJGk8nFAHkI8dJPZsIkmanxMqDGAmEPznL0nzc0LtJpIkLcxQwiDJLyapJKf1WP7OJAeTPJbk2mGMKUkansZhkGQN8A7gKz2WLwN+H7gYeCPwviRvbDquJGl4hrFl8NvAR4Dqsfx84LGqeryqngU+AWwewriSpCFpFAZJNgGHqurBOcrOBJ7smH6qPa/XOrckmUoyNT093aQ9SdKA+p5NlGQPcEaXRVuBXwYu7LeKLvN6bUVQVduB7QCtVqtnnSRpePqGQVVt7DY/yQ8ArwMeTAJwFnB/kvOr6t86Sp8C1nRMnwUcXnDHkqShW/DnDKrqIeD0Y9NJ/hVoVdXXZpX+I3B2ktcBh4BLgZ9e6LiSpOE7Lh86SzIB7Kiqyap6PslVwF3AMuBPqurh4zGuJI2zUV6PZWhhUFXrOu4fBiY7pu8E7hzWWJJ0ohn19Vj8BLIkLQGjvh6LYSBJS8Cor8diGEjSEjDq67EYBpK0BIz6eiwn3FdYS9I4GvX1WAwDSVoiRnk9FncTSZIMA0mSYSBJwjCQJGEYSJKAVC3dSwYkmQaemOfDTgNmf3PqOBjHvsexZ7DvxTaOfY9jzzDT98qqWj3fBy7pMFiIJFNV1Rp1H/M1jn2PY89g34ttHPsex56hWd/uJpIkGQaSpBMzDLaPuoEFGse+x7FnsO/FNo59j2PP0KDvE+6YgSRp/k7ELQNJ0jwZBpKk8Q+DJN+d5O4k/9z++ao5apcleSDJrsXssUcvfftOsibJ3yZ5JMnDSa4eUa/vTHIwyWNJru2yPEl+t718f5I3j6LP2Qbo+7J2v/uTfCHJeaPoc1ZPc/bcUfdDSV5I8t7F7K+XQfpO8rYk+9p/y59b7B67GeBv5LuS/FWSB9t9f2AUfc7q6U+SPJ3kQI/lC3s9VtVY34DfBK5t378W+I05an8B2AnsGoe+gdcCb27fPxX4J+CNi9znMuDLwPcArwAenN0DMAn8NRDgh4H7lsDzO0jfbwFe1b5/8aj7HqTnjrrPAncC7x2T53oV8CVgbXv69DHp+5ePvTaB1cB/Aq8Ycd9vBd4MHOixfEGvx7HfMgA2Ax9r3/8Y8J5uRUnOAt4F7Fictvrq23dVfbWq7m/f/y/gEWCxv+z8fOCxqnq8qp4FPsFM7502A39eM+4FViV57SL3OVvfvqvqC1X19fbkvcBZi9zjbIM81wAfAv4SeHoxm5vDIH3/NHBbVX0FoKqWQu+D9F3AqUkCfCczYfD84rY5q6Gqz7f76GVBr8cTIQxeU1VfhZl/nsDpPeo+CnwEeHGR+upn0L4BSLIOeBNw3/Fv7SXOBJ7smH6KlwfSIDWLbb49fZCZd1Oj1LfnJGcCPwn84SL21c8gz/X3Aa9Kck+SvUmuWLTuehuk798D3gAcBh4Crq6qpfI/pJcFvR7H4kpnSfYAZ3RZtHXAx78beLqq9iZ52xBb6zduo7471vOdzLwT/HBVHRlGb/MZvsu82ecjD1Kz2AbuKckFzITBjx3XjvobpOePAr9UVS/MvFldEgbpeznwg8DbgRXAPyS5t6r+6Xg3N4dB+r4I2Af8BPC9wN1J/m4Er8P5WNDrcSzCoKo29lqW5N+TvLaqvtreFOq2+fmjwKYkk8DJwCuTfLyqLj9OLQND6ZskJzETBLdU1W3HqdW5PAWs6Zg+i5l3SfOtWWwD9ZTkXGZ2HV5cVf+xSL31MkjPLeAT7SA4DZhM8nxV3b4oHXY36N/I16rqW8C3knweOI+Z42CjMkjfHwBurpmd8Y8l+Rfg9cAXF6fFBVnY63GUB0KGdDBlGy89EPubferfxtI4gNy3b2YS/s+Bj46wz+XA48Dr+L+DbN8/q+ZdvPSA1ReXwPM7SN9rgceAt4y630F7nlX/ZyyNA8iDPNdvAP6mXXsKcAA4Zwz6/gPgV9v3XwMcAk5bAs/5OnofQF7Q63Gkv9CQnpRXt//I/rn987vb8yeAO7vUL5Uw6Ns3M7stCtjPzKbqPmByBL1OMvMO7svA1va8K4Er2/cD/H57+UNAa9TP74B97wC+3vHcTi31nmfVLokwGLRv4Bpmzig6wMwuzyXfd/v1uLv9d30AuHwJ9Hwr8FXgOWa2Aj44jNejX0chSTohziaSJDVkGEiSDANJkmEgScIwkCRhGEiSMAwkScD/AOM2eOFvI2itAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "showDataSet(dataMat, label_test,myCentroids)\n",
    "dataMat.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "#二分k-means算法\n",
    "\"\"\"\n",
    "算法原理:\n",
    "\n",
    "由于传统的KMeans算法的聚类结果易受到初始聚类中心点选择的影响，\n",
    "因此在传统的KMeans算法的基础上进行算法改进，对初始中心点选取比较严格，\n",
    "各中心点的距离较远，这就避免了初始聚类中心会选到一个类上，\n",
    "一定程度上克服了算法陷入局部最优状态。\n",
    "二分KMeans(Bisecting KMeans)算法的主要思想是：首先将所有点作为一个簇，\n",
    "然后将该簇一分为二。之后选择能最大限度降低聚类代价函数（也就是误差平方和）的簇划分为两个簇。\n",
    "以此进行下去，直到簇的数目等于用户给定的数目k为止。\n",
    "以上隐含的一个原则就是：因为聚类的误差平方和能够衡量聚类性能，\n",
    "该值越小表示数据点越接近于他们的质心，聚类效果就越好。\n",
    "所以我们就需要对误差平方和最大的簇进行再一次划分，\n",
    "因为误差平方和越大，表示该簇聚类效果越不好，\n",
    "越有可能是多个簇被当成了一个簇，所以我们首先需要对这个簇进行划分。\n",
    "\"\"\"\n",
    "\n",
    "def biKmeans(dataMat, k, distMeas=distEclud):\n",
    "    # 获取样本数和特征数（80,2）\n",
    "    m,n = shape(dataMat)\n",
    "    # 创建一个矩阵来存储数据集中每个点的簇分配结果及平方误差\n",
    "    clusterAssment = mat(zeros((m, 2)))\n",
    "    # 计算整个数据集的质心,并使用一个列表来保留所有的质心\n",
    "    centroid0 = mean(dataMat, axis=0).tolist()[0]\n",
    "    centList = [centroid0]\n",
    "    print('centroid0',centroid0)\n",
    "    print('centList',centList)\n",
    "    #遍历数据集中所有点来计算每个点到质心的误差值\n",
    "    for j in range(m):\n",
    "        clusterAssment[j, 1] = distMeas(mat(centroid0), dataMat[j, :]) ** 2\n",
    "    #对簇不停的进行划分,直到得到想要的簇数目为止\n",
    "    while (len(centList) < k):\n",
    "        # 初始化最小SSE为无穷大,用于比较划分前后的SSE\n",
    "        lowestSSE = inf\n",
    "        # 通过考察簇列表中的值来获得当前簇的数目,遍历所有的簇来决定最佳的簇进行划分\n",
    "        for i in range(len(centList)):\n",
    "            #对每一个簇,将该簇中的所有点堪称一个小的数据集\n",
    "            ptsInCurrCluster = dataMat[nonzero(clusterAssment[:, 0].A == i)[0], :]\n",
    "#             print('ptsInCurrCluster',ptsInCurrCluster)\n",
    "#             (80,2)\n",
    "#             print('shape',mat(ptsInCurrCluster).shape)\n",
    "            #将ptsInCurrCluster输入到函数kMeans中进行处理,k=2,\n",
    "            #kMeans会生成两个质心(簇),同时给出每个簇的误差值\n",
    "            \"\"\"\n",
    "            centroids：质心\n",
    "            clusterAssment：\n",
    "            clusterAssment包含两个列:\n",
    "            第一列记录簇索引值,\n",
    "            第二列存储误差(误差是指当前点到簇质心的距离,后面会使用该误差来评价聚类的效果)\n",
    "            \"\"\"\n",
    "            centroidMat, splitClustAss = kMeans(ptsInCurrCluster, 2, distMeas)\n",
    "            #将误差值与剩余数据集的误差之和作为本次划分的误差\n",
    "            #比如说之前有100个数据，我们二分后产生两个质点，产生的误差和赋值给sseSplit\n",
    "            sseSplit = sum(splitClustAss[:, 1])\n",
    "            #将没有拆分的数据点进行误差求和，第一次是全部参加，所以第一次的时候sseNotSplit为0\n",
    "            #第二次，会将第一次已经二分的簇中，其中（2个簇）一个最值得切分进行切分，\n",
    "            #分别计算切分的和不要切分的簇的误差和，以此类推\n",
    "            sseNotSplit = sum(clusterAssment[nonzero(clusterAssment[:, 0].A != i)[0], 1])\n",
    "            print('sseSplit, and notSplit: ', sseSplit, sseNotSplit)\n",
    "            # 如果本次划分的SSE值最小,则本次划分被保存\n",
    "            if (sseSplit + sseNotSplit) < lowestSSE:\n",
    "                #得到值得二分的簇，记录该簇的编号\n",
    "                bestCentToSplit = i\n",
    "                #得到已经二分的那个簇的两个新的质心\n",
    "                bestNewCents = centroidMat\n",
    "                #包含两个数据：1，数据属于哪个簇，2误差\n",
    "                bestClustAss = splitClustAss.copy()\n",
    "                #将最小误差和赋值，用于for循环后面的比较操作（该循环体的if）\n",
    "                lowestSSE = sseSplit + sseNotSplit\n",
    "        \"\"\"\n",
    "        zly：下面代码通过上文for循环已经得到最值得切分的簇，但是还没有更新簇列表和对应的数据集中的属性\n",
    "        以下两个print意思是：\n",
    "        第一个：打印当前质心的个数\n",
    "        第二个：哪个簇的误差最大（最值得去二分）\n",
    "        第三个：最值得去二分的那个簇有多少数据\n",
    "        \"\"\"\n",
    "        # 找出最好的簇分配结果\n",
    "        #更新簇分配！！！\n",
    "        # 调用kmeans函数并且指定簇数为2时,会得到两个编号分别为0和1的结果簇\n",
    "        #这句的意思是，将前面最值得切分的那个簇分成两个，将编号为1的那个簇的编号设置为新的编号（质心的长度的编号）\n",
    "        bestClustAss[nonzero(bestClustAss[:, 0].A == 1)[0], 0] = len(centList)\n",
    "        print('len(centList)',len(centList))\n",
    "        # 这句的意思是，将前面最值得切分的那个簇分成两个，将编号为0的那个簇的编号设置为我们找到的那个最值得切分的簇\n",
    "        bestClustAss[nonzero(bestClustAss[:, 0].A == 0)[0], 0] = bestCentToSplit\n",
    "        print('the bestCentToSplit is: ', bestCentToSplit)\n",
    "        print('the len of bestClustAss is: ', len(bestClustAss))\n",
    "        \"\"\"\n",
    "        更新我们的质心，以下代码表示：\n",
    "        第一行代码：将值得分的那个簇（对应原簇那个list中，对应为i编号的簇）的质点更新为切分好的两个新簇的第一个质点\n",
    "        第二行代码：append新簇的第二个质点\n",
    "        \"\"\"\n",
    "        # 更新质心列表！！！\n",
    "        # 更新原质心list中的第i个质心为使用二分kMeans后bestNewCents的第一个质心\n",
    "        centList[bestCentToSplit] = bestNewCents[0, :].tolist()[0]\n",
    "        # 添加bestNewCents的第二个质心\n",
    "        centList.append(bestNewCents[1, :].tolist()[0])\n",
    "        # 重新分配最好簇下的数据(质心)以及SSE\n",
    "        clusterAssment[nonzero(clusterAssment[:, 0].A == bestCentToSplit)[0], :] = bestClustAss\n",
    "    return mat(centList), clusterAssment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "centroid0 [-0.10361321250000004, 0.05430119999999998]\n",
      "centList [[-0.10361321250000004, 0.05430119999999998]]\n",
      "zly: 305\n",
      "sseSplit, and notSplit:  1000.7486813637506 0.0\n",
      "len(centList) 1\n",
      "the bestCentToSplit is:  0\n",
      "the len of bestClustAss is:  80\n",
      "zly: 367\n",
      "sseSplit, and notSplit:  389.1319549760543 95.53689260458489\n",
      "zly: 52\n",
      "sseSplit, and notSplit:  49.27737255260635 905.2117887591658\n",
      "len(centList) 2\n",
      "the bestCentToSplit is:  0\n",
      "the len of bestClustAss is:  58\n",
      "zly: 173\n",
      "sseSplit, and notSplit:  47.927020072288855 378.35037583505795\n",
      "zly: 153\n",
      "sseSplit, and notSplit:  51.95480394942269 389.1319549760543\n",
      "zly: 154\n",
      "sseSplit, and notSplit:  34.79525289942992 201.8553643501661\n",
      "len(centList) 3\n",
      "the bestCentToSplit is:  2\n",
      "the len of bestClustAss is:  33\n",
      "zly: 214\n",
      "sseSplit, and notSplit:  47.927020072288855 130.33214550401482\n",
      "zly: 83\n",
      "sseSplit, and notSplit:  51.95480394942269 141.1137246450111\n",
      "zly: 52\n",
      "sseSplit, and notSplit:  10.419657126631476 219.94359137128788\n",
      "zly: 62\n",
      "sseSplit, and notSplit:  13.56032255505883 218.56239022847424\n",
      "len(centList) 4\n",
      "the bestCentToSplit is:  0\n",
      "the len of bestClustAss is:  25\n"
     ]
    }
   ],
   "source": [
    "myCentroids01,clustAssing01=biKmeans(dataMat,5, distMeas=distEclud)\n",
    "label_test_01=np.array(clustAssing01[:,0],dtype = np.int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(5,)\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXIAAAD4CAYAAADxeG0DAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAYzUlEQVR4nO3df4ydVZ3H8c+3v2sLrQ0l/U2BAIrYBHYUkQiVakFLKTGk6qox8Q9issbCKm5ZFBt0IwnRUqLJhribkNgVu4B0azUUgdZ1+bFOW6hiAaEBYToNNd1WW1rotN/9485tZ26fe+e59z4/zvM871fStPPMnXvPnZl+7rnnfM855u4CABTXqLwbAADoDkEOAAVHkANAwRHkAFBwBDkAFNyYPB70jDPO8Pnz5+fx0ABQWFu3bv2Lu09vvJ5LkM+fP1+9vb15PDQAFJaZvRZ1naEVACg4ghwACo4gB4CCI8gBoOAIcgAoOIIcAAoul/JDAOF5eHuf7nrkRe3ef1izpk7ULVdfoOsvnp13sxADQQ5AD2/v060P/V6Hjx6TJPXtP6xbH/q9JJUmzMv8QsXQCgDd9ciLJ0K87vDRY7rrkRdzalGy6i9UffsPy3Xyherh7X15Ny0RBDkA7d5/uK3rRVP2FyqCHIBmTZ3Y1vWiKfsLFUEOQLdcfYEmjh097NrEsaN1y9UX5NSiZJX9hYogB6DrL56t733q/Zo9daJM0uypE/W9T72/NJOBZX+homoFgKRamJcluBvVn1dZq1YIcgCVUOYXKoZWAKDgCHIAKDiCHAAKjiAHgIIjyAGg4AhyACg4ghwACo4gB4CCI8gBoOBY2QlUSJkPV6gyghypeumZPXpq/Ss6uO9tTZ42XpctO1fnXzoj72ZVUhVOAaoqhlaQmpee2aMn1r6gg/veliQd3Pe2nlj7gl56Zk/OLaumsh+uUGUEOVLz1PpXNPDO8WHXBt45rqfWv5JTi6qt7IcrVBlBjtTUe+JxryNdZT9cocoIcqRm8rTxbV1Husp+uEKVJRbkZjbazLab2S+Suk8U22XLztWYccN/xcaMG6XLlp2bU4uqreynAFVZklUrKyTtlHR6gveJAqtXp1C1Eo4yHK5ACeWpEglyM5sjaYmkf5H0j0ncJ8rh/EtnENxIDCWU0ZLqkd8t6RuSTmt2AzO7UdKNkjRv3ryEHhZAN+L0bkPqAbcqoaxykHc9Rm5m10p60923trqdu9/r7j3u3jN9+vRuHxZAl+q92779h+U62bt9eHtfW7fJEiWU0ZKY7Lxc0nVm9qqk+yVdZWY/SeB+AaQozgKh0BYRUUIZresgd/db3X2Ou8+X9BlJj7v757tuGYBUxendhtYDpoQyGnXkQEXF6d2G1gOmhDJaoptmuftmSZuTvE8A6bjl6guGVYBIp/Zu49wma2UooUwaux8iU+yGGI56GLaqSIlzG+TP3D3zB+3p6fHe3t7MHxf5qu+GOHQjrTHjRumjn3sPYQ7EYGZb3b2n8Tpj5MgMuyEC6SDIA7Vx10YtfmCxFty3QIsfWKyNuzbm3aSusRsikA7GyAO0cddGrXpylY4cOyJJ6j/Ur1VPrpIkLTlnSY4t687kaeMjQ5vdEIsnpNWeqHCQb9y1UWu2rdGeQ3s0Y9IMrbhkRTAhuWbbmhMhXnfk2BGt2bYmmDZ24rJl50aOkSe5GyKTqeljv5PwVHJopd7j7T/UL5ef6PGGMnyx51D0UWj9h/oLPdxy/qUz9NHPvedED3zytPGJTnRytFw24q72XLhwoRYuXJhhy6qrkj3y0Hu8MybNUP+h/sjP1a8Xdbglzd0QW02m0itPTmirPVHRHnmzHm+z61lbcckKTRg9YcTbHTl2RCv/e2Uhe+dpYDI1G81WdY4yy20zraqrZJDPmBTdO2t2PWtLzlmiVR9epZmTZspkmjlpZsvbhzY0lBeOlstG1H4nknTMXbf853OEeQ4quSCosSpEkiaMnqBVH14V7DDF4gcWNx1uqZs5aaY23bAp1XaEPEnMgqPsPLy9Tzf/7FkNTY89/7FSkjRm1Cj1zH+3tmzZIkm68sorh33t5s2bM2pl+bAgaIioHm/IIS7FG25Je2go9EnitCdTcdL1F89Wsy7gwPHjTT5zqoe39+nyOx/X2Ss36vI7H6c336FK9siLqt4bbtYzT7tH3uxdQRbvBBCe+Subv4C/eueSExUrzXrgjWWMUm1DrlB2MwyxVr5Zj7ySVStFteScJVpyzpKmQ0MrLlmR6uO3KoscCfXd5fPud43V/711NPJ6HCEf21a0WvlKDq0UXTdDQ90s/W81Gdzqfqjv7lzIQw/fXvo+jR1tw66NHW369tL3xfr6kMsYQzsZaSQEeUEtOWeJNt2wSTu+uEObbth0IsRbBXXUGPc3f/tNfeT+j8QK9lY9/jXb1jT9HJtldSa08zIbXX/xbH36A3M12mphPtpMn/7A3Ng91tAOrRgq5BeZKAR5iYw0GRm1EGrAB7T/7f2xJi9b9fhbTbRS392Z0HuFD2/v04Nb+3RscJ7tmLse3Np34oVm8+bNLStUQj62LeQXmSgEeYm0WrEqxatqGXr7KM1q2lsNu1Df3ZnQe4XdvtAkfWxbksNQIb/IRGGys0SaBfWT33xSC3+4UDO+0nzpf5z7kWrDK+1OtKa1WdZIVRFFN2vqRPVFhHZWvcKRqjaSeKFJ6ti2pCcni3YyEkFeIs32aBk3epyk6BBudj/N1IdX2lkUVK9OoWqlPXmelxknGPN+oRkqjQqYIp0NSpCXSLPe8uzTar+MjSF8+rjT9dbAWzp6fHgJ2RVzrmj5OPUyyHakuVlWWeXZK4wTjCEdzBz6MFTaCPISadZbvuvHdw27zdAQ/u7T39XPXvzZsPtZ//J6XXzmxV2vdA15OX9R5NUrjBOMIQ0/hPTuIA+s7Cyhxj2gW+15kdZqzTT2s2nneaE7l9/5eGQwzp46Uf+z8qocWtRa6KtEk8JeK4iU1pa+I1XQIGxZVW0kVWmSdAVM0TC0UkKNPdJW1R3NJki73dI3jReIdp4XupPFsEkalSZVCe5GBHnFdVJOGEdaLxDITtrBGPJeK0XD0ErFpbWlb9S2u1ls7IXiqHqlSZLokaOjcsI49ym1V2+Oaql6pUmSqFpBJihFRKOqVJokif3IkZvGUsT65lxS6424OsG+58URUh160dEjR+qyOlmIMztRdtSRIzdp1ao3Yt9zVBVBjtQ1KzlMuhSRfc9RVV0HuZnNNbMnzGynmT1vZtSXYZisShHZ9xxVlcRk54Ckr7n7NjM7TdJWM3vU3f+YwH2jBLIqRUxr33O0J8TT58uu6yB3935J/YP//puZ7ZQ0WxJBjhPSqFVvxL7n+Sva6fNlkWjVipnNl/QbSRe5+18bPnejpBslad68eX/32muvJfa4AMJQtF0Tiyb1qhUzmyzpQUk3NYa4JLn7ve7e4+4906dPT+phAQSEZff5SCTIzWysaiG+1t0fSuI+0dzGXRu1+IHFWnDfAi1+YHHTU++BrBXt9PmySKJqxST9m6Sd7v6D7puEVuqrJPsP9cvlJ1ZJEuYIQdFOny+LJHrkl0v6gqSrzOzZwT+fTOB+EYEDGxCyqh/wkJckqlZ+K8kSaAtiyGqVJNCpqH3MKUlMF5tmFQwHNqBompUk9r62T0+8sJdwTwBL9AMRdwKTAxtQNM1OAlr79J/Vt/+wXCfDvdMzO6uOIA9AOxOYaZ3oA6SlWelh4wqW+jFvaB9DKwFoNYEZFdBZrJIEktLsJKAo1Jt3hh55AJjARJlFlSQ2q46g3rwzBHkAstrmFchDVEni5z40j3rzBDG0EoAVl6wYdhSaxAQmyiWqJLHnrGmUJCaEIA8AJ86jiqLCHZ0hyAPBBCaATjFGDgBRdqyTVl8krZpa+3vHurxb1BQ9cgBotGOdtOGr0tHBcsgDr9c+lqQFy/NrVxP0yAGg0WN3nAzxuqOHa9cDRJADQKMDb7R3PWcEOQA0mjKnves5I8gBoNGi26WxDatMx06sXQ8QQQ4AjRYsl5beI02ZK8lqfy+9J8iJTomqlcrZuGsjC4+AOBYsDza4GxHkFVLfLre+FUB9u1xJhDlQYAytVAjnfQLlRJBXCNvlAuVEkFcI2+UC5USQVwjnfQLlxGRnhbBdLlBOBHnFsF0uUD4MrQBAwRHkAFBwBDkAFBxBDgAFR5ADQMER5ABQcAQ5ABQcQQ4ABZdIkJvZNWb2opm9bGYrk7hPAEA8XQe5mY2W9CNJn5B0oaTPmtmF3d4vACCeJHrkH5T0srvvcvd3JN0vaVkC9wsAiCGJIJ8t6fUhH78xeA0AkIEkgtwirvkpNzK70cx6zax37969CTwsAEBKJsjfkDR3yMdzJO1uvJG73+vuPe7eM3369AQeFgAgJRPkv5N0npmdbWbjJH1G0n8lcL8AgBi63o/c3QfM7CuSHpE0WtK/u/vzXbcMABBLIgdLuPsvJf0yifsCALSHlZ0AUHAEOQAUHGd2Ajl66Zk9emr9Kzq4721NnjZely07V+dfOiPvZqFgCHIgJy89s0dPrH1BA+8clyQd3Pe2nlj7giQR5mgLQytATp5a/8qJEK8beOe4nlr/Sk4tQlER5EBODu57u63rQDMEOZCTydPGt3UdaIYgB3Jy2bJzNWbc8P+CY8aN0mXLzs2pRSgqJjuBnNQnNKlaQbcIciBH5186g+Aukh3rpMfukA68IU2ZIy26XVqwPO9WEeQAEMuOddKGr0pHD9c+PvB67WMp9zBnjBwA4njsjpMhXnf0cO16zghyAIjjwBvtXc8QQQ4AcUyZ0971DBHkABDHotulsROHXxs7sXY9ZwQ5AMSxYLm09B5pylxJVvt76T25T3RKVK0AQHwLlncW3CmXLRLkAJCmDMoWGVoBgDRlULZIkANAmjIoWyTIASBNGZQtEuQAkKYMyhYJcgBIUwZli1StAEDaOi1bjIkeOQAUHEEOAAVHkANAwRHkAFBwBHmADmzYoD9dtUg733uh/nTVIh3YsCHvJgFotGOdtPoiadXU2t871uXWFKpWAnNgwwb1f+t2+ZEjkqSB3bvV/61avemUpUvzbBqAusCOfaNHHpg3V999IsTr/MgRvbn67pxaBOAUgR37RpAHZqC/v63rQKYCGk7IVWDHvhHkgRkzc2Zb14HM1IcTDrwuyU8OJ1QxzAM79q2rIDezu8zsBTPbYWY/N7OpSTWsqs68+SbZhAnDrtmECTrz5ptyahEwKLDhhFwFduxbtz3yRyVd5O4LJL0k6dbum1RtU5Yu1czv3KExs2ZJZhoza5ZmfueO1CY6qZBBbJ0MJ5R1KCawY9+6qlpx901DPnxa0g3dNQdSLcyzqFChQgZtmTJncFgl4nqUwCo7Epfy/intSHKM/EuSftXsk2Z2o5n1mlnv3r17E3xYjKRZr5sKGbSl3eEEhmIyM2KP3Mx+LWlGxKduc/f1g7e5TdKApLXN7sfd75V0ryT19PR4R63NwIENG/Tm6rs10N+vMTNn6sybbyp077RVr5sKGbSl3vuMe4hwYJUdZTZikLv7x1p93sy+KOlaSYvcPdiAjqOMQw2tet1jZs7UwO7dp3wNFTJoqp3hhHaHYtCxbqtWrpH0T5Kuc/e3kmlSfrIeauh0orGdr2vV66ZCBqkKrLKjzLpdov9DSeMlPWpmkvS0u3+561blJMuhhk57/+1+Xated/32ZRpKQkDaHYpBxyyP0ZCenh7v7e3N/HFH8qerFkWH3qxZOu/xx4J4rHa/rjH4pVqvO82SRgDpMLOt7t7TeJ2VnUNkNdRwYMOGyDCWRu79t/uuIeu6dADZY/fDIbIYaqj3kJsZaaKxkwnKrOrSAeSDIG+QduhFTajWxen9n3nzTZFDJUxQAtVFkGes1dBJnCEPJigRhB3rmMRsJofvDUGesaZDI7NmxQ5jhkqQq7Ivve9GTt8bJjszRu02Ci+ppfdl3FArp20JCPKMNasiWfb972vhwoV5Nw8YWZtL7xcuXHjq73ZZ9zbPaVsCgjwHU5Yu1XmPP6b37vyjznv8sabDJGwxiyAlcahCWTfUyunACYI8UPUyxYHduyX3Eys4CXPkLoml92XdUCunbQkI8kCxxSyClcShCoEdlZaYnA6cqFTVSkhb1DaOGW7ZsmXY9bf+93eSXPfNO2vY7dhiFkFosQviSL/bkqRDrs3XTxw+vFKWDbVyOHCiMj3yog1V2PhxkdfZYhalMGl6UEelFV1lNs3KckOsTnr+9d7K5s2bT9wHm10hGF0scmn83Ubnmm2aVZmhlay2qE3qcApWcCIYLAAKXmWCPKvTcFpNUrYbwkmt4Gx8hzD5yit0cMtveIFAPK1KBQnyIFRmjDyrFZWhnYMZNTew/6f3F2auAAEoa6lgiVSmR57VUEWnPf+0xg9b7bZY1+k7BpRIqzHwLs/eTOV3m027hqlMkEvZbDY1+cortP+n959yfexZ82oTrhkPZ8R9J0BZY4WNNAa+6Pbhn5fyLRVkzP4UlRlaycrBLb+JvH746WdyGc6IOwdAWWOKQt8caqTl8t0scknjuZd1eX8XKtUjz0LTnm1DmWdWwxlRB1E0YvfFFBWh9xhnDLyTRS5pPXfG7E9RyB55yJtJtdOzzWI4I2q3xamf/QxneGalCL3HtJbLt/Pc2+m5l3V5fxcK1yNPqk47LXF6wHVZDWdwEEWOitB7TGsMPO5zb7fnHtqYfQAK1yMPfTOpZj3gEA+TCPmdTWkUofeY1kZPcZ97u+9actqYKmSF65GHVqcdJaoH/K5LLglqlWbo72xKoyi9xzQ2eor73Dt515LDxlQhK1yQZ7VCM2lJD290u5NjkitQ0UI9bKpY8xz3uXdZp44CBnnUGHQIwxRZSqI3XYR3NqVR5d5jnOce1XOXpHcO1cbPq/q9a0PhxsibnXlZpV5kEvMEzd7BhP7OBiVUH/OeOG349cP7ynGOZwYKF+RS/DMvyyqJ3nRWe88AsSxYLo2bdOr10Eo1A1W4oRUkM0/ANrkIThFKNQNFkBdQUvME1JcjKM0mPW0UY+UjKOTQStUxT4BSijqBXpL8GGPlI6jMUW8ACmDHOunnX66Fd6Mpc6Wb/5B9mwLS7Kg3euRtYjVkyYS+M2HVLFgu+fHoz7UaK6/4zzGRIDezr5uZm9kZSdxfqKJO2+F0nQKr7/Fx4HVJfnKPj1BCoKrh1GwhUH2svFHoP8cMdB3kZjZX0scl/bn75oQt9H1e0KaQdyascji1O1Ye8s8xI0n0yFdL+oak7AfbM8ZqyJIJudytyuFUXyBko0/9XNT3IOSfY0a6CnIzu05Sn7s/F+O2N5pZr5n17t27t5uHzQ2rIUsm5J0Jqx5O7YyVh/xzzMiIQW5mvzazP0T8WSbpNkmxtnFz93vdvcfde6ZPn95tu3PBasiSiXoLH8rOhIRT/O9ByD/HjIwY5O7+MXe/qPGPpF2Szpb0nJm9KmmOpG1mNiPdJueH+u1BZZmEC3lfa8Ip/vcg5J9jRhKrIx8M8x53/8tIt6WOvMAaT3ORav+5KvYfJxM71lVz+9uh+B4M06yOnCBHe1Zf1GTvaBZrAGlrFuSJ7bXi7vOTui8ErOqTcECAWNmJ9jAJBwSHIEd7mIQDgkOQoz1UCADBYT9ytK/KZ1ACAaJHDgAFR5ADQMER5ABQcAQ5ABQcQQ4ABUeQA0DB5XL4spntlfRa5g+cjzMkjbj/TAlU5XlKPNeyKsJzPcvdT9kHPJcgrxIz643a5KZsqvI8JZ5rWRX5uTK0AgAFR5ADQMER5Om7N+8GZKQqz1PiuZZVYZ8rY+QAUHD0yAGg4AhyACg4gjwjZvZ1M3MzOyPvtqTFzO4ysxfMbIeZ/dzMpubdpqSZ2TVm9qKZvWxmK/NuT1rMbK6ZPWFmO83seTNbkXeb0mRmo81su5n9Iu+2dIIgz4CZzZX0cUl/zrstKXtU0kXuvkDSS5Juzbk9iTKz0ZJ+JOkTki6U9FkzuzDfVqVmQNLX3P29kj4k6R9K/FwlaYWknXk3olMEeTZWS/qGpFLPLLv7JncfGPzwaUllO8jzg5Jedvdd7v6OpPslLcu5Talw93533zb477+pFnKz821VOsxsjqQlkn6cd1s6RZCnzMyuk9Tn7s/l3ZaMfUnSr/JuRMJmS3p9yMdvqKThNpSZzZd0saRn8m1Jau5WraN1PO+GdIqj3hJgZr+WNCPiU7dJ+mdJi7NtUXpaPVd3Xz94m9tUe2u+Nsu2ZcAirpX6XZaZTZb0oKSb3P2vebcnaWZ2raQ33X2rmS3Muz2dIsgT4O4fi7puZu+XdLak58xMqg01bDOzD7r7ngybmJhmz7XOzL4o6VpJi7x8ixTekDR3yMdzJO3OqS2pM7OxqoX4Wnd/KO/2pORySdeZ2SclTZB0upn9xN0/n3O72sKCoAyZ2auSetw99B3WOmJm10j6gaQr3X1v3u1JmpmNUW0Sd5GkPkm/k/T37v58rg1LgdV6HvdJ2ufuN+XdniwM9si/7u7X5t2WdjFGjiT9UNJpkh41s2fN7F/zblCSBidyvyLpEdUm/9aVMcQHXS7pC5KuGvxZPjvYa0WA6JEDQMHRIweAgiPIAaDgCHIAKDiCHAAKjiAHgIIjyAGg4AhyACi4/wf9MG2z8av/6AAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "showDataSet(dataMat, label_test_01,myCentroids01)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#以下不看"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [],
   "source": [
    "def distSLC(vecA, vecB):\n",
    "    '''\n",
    "    返回地球表面两点间的距离,单位是英里\n",
    "    给定两个点的经纬度,可以使用球面余弦定理来计算亮点的距离\n",
    "    :param vecA:\n",
    "    :param vecB:\n",
    "    :return:\n",
    "    '''\n",
    "    # 经度和维度用角度作为单位,但是sin()和cos()以弧度为输入.\n",
    "    # 可以将江都除以180度然后再诚意圆周率pi转换为弧度\n",
    "    a = sin(vecA[0, 1] * pi / 180) * sin(vecB[0, 1] * pi / 180)\n",
    "    b = cos(vecA[0, 1] * pi / 180) * cos(vecB[0, 1] * pi / 180) * \\\n",
    "        cos(pi * (vecB[0, 0] - vecA[0, 0]) / 180)\n",
    "    return arccos(a + b) * 6371.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [],
   "source": [
    "def clusterClubs(fileName, imgName, numClust=5):\n",
    "    # 创建一个空列表\n",
    "    datList = []\n",
    "    # 打开文本文件获取第4列和第5列,这两列分别对应维度和经度,然后将这些值封装到datList\n",
    "    for line in open(fileName).readlines():\n",
    "        lineArr = line.split('\\t')\n",
    "        datList.append([float(lineArr[4]), float(lineArr[3])])\n",
    "    datMat = mat(datList)\n",
    "    # 调用biKmeans并使用distSLC函数作为聚类中使用的距离计算方式\n",
    "    myCentroids, clustAssing = biKmeans(datMat, numClust, distMeas=distSLC)\n",
    "    # 创建一幅图和一个举行,使用该矩形来决定绘制图的哪一部分\n",
    "    fig = plt.figure()\n",
    "    rect = [0.1, 0.1, 0.8, 0.8]\n",
    "    # 构建一个标记形状的列表用于绘制散点图\n",
    "    scatterMarkers = ['s', 'o', '^', '8', 'p', 'd', 'v', 'h', '>', '<']\n",
    "    axprops = dict(xticks=[], yticks=[])\n",
    "    ax0 = fig.add_axes(rect, label='ax0', **axprops)\n",
    "    # 使用imread函数基于一幅图像来创建矩阵\n",
    "    imgP = plt.imread(imgName)\n",
    "    # 使用imshow绘制该矩阵\n",
    "    ax0.imshow(imgP)\n",
    "    # 再同一幅图上绘制一张新图,允许使用两套坐标系统并不做任何缩放或偏移\n",
    "    ax1 = fig.add_axes(rect, label='ax1', frameon=False)\n",
    "    # 遍历每一个簇并将它们一一画出来,标记类型从前面创建的scatterMarkers列表中得到\n",
    "    for i in range(numClust):\n",
    "        ptsInCurrCluster = datMat[nonzero(clustAssing[:, 0].A == i)[0], :]\n",
    "        # 使用索引i % len(scatterMarkers)来选择标记形状,这意味这当有更多簇时,可以循环使用这标记\n",
    "        markerStyle = scatterMarkers[i % len(scatterMarkers)]\n",
    "        # 使用十字标记来表示簇中心并在图中显示\n",
    "        ax1.scatter(ptsInCurrCluster[:, 0].flatten().A[0], ptsInCurrCluster[:, 1].flatten().A[0], \n",
    "                    marker=markerStyle,s=90)\n",
    "    ax1.scatter(myCentroids[:, 0].flatten().A[0], myCentroids[:, 1].flatten().A[0], marker='+', s=300)\n",
    "    plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
